In [1]:
#NOTE TIL DIG SELV. 

#OVERVEJ OG SKIFT TILBAGE TIL TIDL. DATASÆT. DET NYE FUNGERER STADIG IKKE OG VILLE TAGE TID AT FÅ STYR PÅ. 
#DET GAMLE HAR KUN DATA FOR EUROPA, VED MINDRE DET ER FOR DE SIDSTE PAR ÅR.
In [2]:
import random

import folium
from folium import plugins
from folium.plugins import HeatMap

from bokeh.plotting import figure, show
from bokeh.models import ColumnDataSource
from bokeh.io import output_file, output_notebook
from bokeh.models import Legend
from bokeh.transform import dodge
from bokeh.models import ColumnDataSource, FactorRange
from bokeh.palettes import Category20b
from bokeh.transform import factor_cmap
from sklearn import preprocessing, tree, metrics

import pandas as pd
import numpy as np
from scipy import stats
import scipy, pylab
import seaborn as sns
import matplotlib.pyplot as plt
import collections
import os
import datetime as dt
from scipy.stats import ttest_ind
from bokeh.plotting import figure, output_file, show
from bokeh.io import curdoc, show, output_notebook
from bokeh.models import ColumnDataSource, Grid, LinearAxis, Plot,  HoverTool
from bokeh.models.widgets import Panel, Tabs

from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import RandomizedSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn import svm
from sklearn.svm import SVC
from sklearn.metrics import plot_confusion_matrix, confusion_matrix
from sklearn.model_selection import train_test_split

from bokeh.resources import INLINE
import bokeh.io
import math

bokeh.io.output_notebook(INLINE)
Loading BokehJS ...
In [3]:
df_death = pd.read_csv(r'death-rates-total-air-pollution.csv')
df_pop = pd.read_csv(r'worldcities.csv')
In [4]:
death_rate = df_death['Deaths - Cause: All causes - Risk: Air pollution - Sex: Both - Age: Age-standardized (Rate)'].groupby(df_death['Year']).mean()
In [5]:
years = ['2021Q1', '2021Q2', '2021Q3', '2021Q4', '2020Q1', '2020Q2', '2020Q3', '2020Q4',
         '2019Q1', '2019Q2', '2019Q3', '2019Q4', '2018H1', '2017H1', '2016H1', '2015H1']
In [6]:
df = pd.DataFrame()  

for i in range(len(years)-1):
    filename = 'downloaded'+years[i]+'.csv'
    
    df1 = pd.read_csv(filename, skiprows=4)

    df = pd.concat([df,df1], ignore_index=True)
In [7]:
df_pm25 = df[df['Specie'] =='pm25']
city_list = np.unique(df_pm25['City'])
df_pm25['Date'] = pd.to_datetime(df['Date'])
list_count = []
city_count = []
for i in range(len(city_list)):
    country1 = df_pm25[df_pm25['City'] == city_list[i]]
    resampled = country1.resample('Y', on='Date', origin='start').mean()
    resampled = resampled[resampled['median'].notna()]
    resampled = resampled.reset_index()
    if len(resampled) > 6:
        list_count.append(resampled)
        city_count.append(city_list[i])
C:\Users\Bruger\AppData\Local\Temp/ipykernel_9052/3714203112.py:3: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_pm25['Date'] = pd.to_datetime(df['Date'])
In [8]:
list_count_new = []
city_count_new = []
df_pop.drop_duplicates(subset ="city_ascii", keep = False, inplace = True)

for i in range(len(list_count)):
    if city_count[i] in list(df_pop['city_ascii']):
        if float(df_pop[df_pop['city_ascii'] == city_count[i]]['population']) > 1000000:
            list_count_new.append(list_count[i])
            city_count_new.append(city_count[i])

list_count = list_count_new
city_count = city_count_new
In [9]:
median_ave=[]
cities = []
for i in range(len(list_count)):
    median_ave.append(np.mean(list_count[i]['median']))
    cities.append(city_count[i])
In [10]:
aq_list_pm25 = []
lon_list_pm25 = []
lat_list_pm25 = []
city_list_pm25 = []
pop_pm25 = []
df_pop.drop_duplicates(subset ="city_ascii", keep = False, inplace = True)


for i in range(len(list_count)):
    if cities[i] in list(df_pop['city_ascii']):

        aq_list_pm25.append(median_ave[i])
        lon_list_pm25.append(float(df_pop[df_pop['city_ascii'] == cities[i]]['lng']))
        lat_list_pm25.append(float(df_pop[df_pop['city_ascii'] == cities[i]]['lat']))
        city_list_pm25.append(cities[i])
        pop_pm25.append(float(df_pop[df_pop['city_ascii'] == cities[i]]['population']))

###
In [11]:
###
df_pm25 = df[df['Specie'] =='pm10']
city_list = np.unique(df_pm25['City'])
df_pm25['Date'] = pd.to_datetime(df['Date'])
list_count = []
city_count = []
for i in range(len(city_list)):
    country1 = df_pm25[df_pm25['City'] == city_list[i]]
    resampled = country1.resample('Y', on='Date', origin='start').mean()
    resampled = resampled[resampled['median'].notna()]
    resampled = resampled.reset_index()
    if len(resampled) > 6:
        list_count.append(resampled)
        city_count.append(city_list[i])
C:\Users\Bruger\AppData\Local\Temp/ipykernel_9052/929927433.py:4: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_pm25['Date'] = pd.to_datetime(df['Date'])
In [12]:
list_count_new = []
city_count_new = []
df_pop.drop_duplicates(subset ="city_ascii", keep = False, inplace = True)


for i in range(len(list_count)):
    if city_count[i] in list(df_pop['city_ascii']):
        if float(df_pop[df_pop['city_ascii'] == city_count[i]]['population']) > 1000000:
            list_count_new.append(list_count[i])
            city_count_new.append(city_count[i])

list_count = list_count_new
city_count = city_count_new
In [13]:
median_ave=[]
cities = []
for i in range(len(list_count)):
    median_ave.append(np.mean(list_count[i]['median']))
    cities.append(city_count[i])
In [14]:
aq_list_pm10 = []
lon_list_pm10 = []
lat_list_pm10 = []
city_list_pm10 = []
pop_pm10 = []
df_pop.drop_duplicates(subset ="city_ascii", keep = False, inplace = True)


for i in range(len(list_count)):
    if cities[i] in list(df_pop['city_ascii']):

        aq_list_pm10.append(median_ave[i])
        lon_list_pm10.append(float(df_pop[df_pop['city_ascii'] == cities[i]]['lng']))
        lat_list_pm10.append(float(df_pop[df_pop['city_ascii'] == cities[i]]['lat']))
        city_list_pm10.append(cities[i])
        pop_pm10.append(float(df_pop[df_pop['city_ascii'] == cities[i]]['population']))

###
In [15]:
###
df_pm25 = df[df['Specie'] =='no2']
city_list = np.unique(df_pm25['City'])
df_pm25['Date'] = pd.to_datetime(df['Date'])
list_count = []
city_count = []
for i in range(len(city_list)):
    country1 = df_pm25[df_pm25['City'] == city_list[i]]
    resampled = country1.resample('Y', on='Date', origin='start').mean()
    resampled = resampled[resampled['median'].notna()]
    resampled = resampled.reset_index()
    if len(resampled) > 6:
        list_count.append(resampled)
        city_count.append(city_list[i])
C:\Users\Bruger\AppData\Local\Temp/ipykernel_9052/3443812454.py:4: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_pm25['Date'] = pd.to_datetime(df['Date'])
In [16]:
list_count_new = []
city_count_new = []
df_pop.drop_duplicates(subset ="city_ascii", keep = False, inplace = True)


for i in range(len(list_count)):
    if city_count[i] in list(df_pop['city_ascii']):
        if float(df_pop[df_pop['city_ascii'] == city_count[i]]['population']) > 1000000:
            list_count_new.append(list_count[i])
            city_count_new.append(city_count[i])

list_count = list_count_new
city_count = city_count_new
In [17]:
median_ave=[]
cities = []
for i in range(len(list_count)):
    median_ave.append(np.mean(list_count[i]['median']))
    cities.append(city_count[i])
In [18]:
aq_list_no2 = []
lon_list_no2 = []
lat_list_no2 = []
city_list_no2 = []
pop_no2 = []
df_pop.drop_duplicates(subset ="city_ascii", keep = False, inplace = True)


for i in range(len(list_count)):
    if cities[i] in list(df_pop['city_ascii']):

        aq_list_no2.append(median_ave[i])
        lon_list_no2.append(float(df_pop[df_pop['city_ascii'] == cities[i]]['lng']))
        lat_list_no2.append(float(df_pop[df_pop['city_ascii'] == cities[i]]['lat']))
        city_list_no2.append(cities[i])
        pop_no2.append(float(df_pop[df_pop['city_ascii'] == cities[i]]['population']))

###
In [ ]:
 
In [149]:
import folium
from folium import plugins
from folium.plugins import MarkerCluster
from folium.plugins import BeautifyIcon
import matplotlib.cm as cm

import branca
import branca.colormap as cm
from bokeh.models import ColorBar, LogColorMapper
from bokeh.plotting import figure, output_file, show
# from folium import LayerControl
# from folium.plugins import FeatureSubGroup

m = folium.Map(location=[30,0], tiles=None, position='relative', zoom_start=2)

folium.TileLayer('cartodbpositron', name='Controls').add_to(m)

colormap = cm.LinearColormap(colors=['green','red'], index=None,vmin=min(aq_list_pm25),vmax=max(aq_list_pm25))

colormap = colormap.to_step(index=[0, 20, 40, 60, 80, 100])
colormap.caption = 'Concentration (µg/m3)'
colormap.add_to(m)
    
    
all_m = folium.FeatureGroup(name='All',show =True)
m.add_child(all_m)

pm10_m = plugins.FeatureGroupSubGroup(all_m, 'PM10',show =True)
m.add_child(pm10_m)

pm25_m = plugins.FeatureGroupSubGroup(all_m, 'PM25',show =False)
m.add_child(pm25_m)

no2_m = plugins.FeatureGroupSubGroup(all_m, 'NO2',show =False)
m.add_child(no2_m)

folium.LayerControl(collapsed = False).add_to(m)

colormap = cm.LinearColormap(colors=['green','red'],vmin=0,vmax=100)

for i in range(len(aq_list_pm25)):

    text = f"City: <strong>{(city_list_pm25[i])}</strong></br> \Concentration (µg/m3): <strong>{round(aq_list_pm25[i],2)}</strong></br> \Population (mio.): <strong>{round(pop_pm25[i]/1000000,2)}</strong></br>"
    if pop_pm25[i] > 1500000:
        radius = pop_pm25[i]/1000000
    else: 
        radius = 1.5
        
    folium.CircleMarker(
        location=[lat_list_pm25[i],lon_list_pm25[i]],
        tooltip=text,
        radius=radius,
        fill=True,
        fill_color=colormap(aq_list_pm25[i]),
        fill_opacity = 0.5,
        stroke = False).add_to(pm25_m)

###    

colormap = cm.LinearColormap(colors=['green','red'],vmin=0,vmax=100)

for i in range(len(aq_list_pm10)):

    text = f"City: <strong>{(city_list_pm10[i])}</strong></br> \Concentration (µg/m3): <strong>{round(aq_list_pm10[i],2)}</strong></br> \Population (mio.): <strong>{round(pop_pm10[i]/1000000,2)}</strong></br>"
    if pop_pm10[i] > 1500000:
        radius = pop_pm10[i]/1000000
    else: 
        radius = 1.5
    folium.CircleMarker(
        
    location=[lat_list_pm10[i],lon_list_pm10[i]],
    tooltip=text,
    radius=radius,
    fill=True,
    fill_color=colormap(aq_list_pm10[i]),
    fill_opacity = 0.5,
    stroke = False).add_to(pm10_m)
    

colormap = cm.LinearColormap(colors=['green','red'],vmin=0,vmax=100)

for i in range(len(aq_list_no2)):

    text = f"City: <strong>{(city_list_no2[i])}</strong></br> \Concentration (µg/m3): <strong>{round(aq_list_no2[i],2)}</strong></br> \Population (mio.): <strong>{round(pop_no2[i]/1000000,2)}</strong></br>"
    if pop_no2[i] > 1500000:
        radius = pop_no2[i]/1000000
    else: 
        radius = 1.5
    folium.CircleMarker(
        location=[lat_list_no2[i],lon_list_no2[i]],
        tooltip=text,
        radius=radius,
        fill=True,
        fill_color=colormap(aq_list_no2[i]),
        fill_opacity = 0.5,
        stroke = False).add_to(no2_m)
    
m
Out[149]:
Make this Notebook Trusted to load map: File -> Trust Notebook